Trying validate model and covariates with a single pig and then scaling up to all pigs at Tejon Ranch.
library(data.table)
library(raster)
library(yaml)
library(glmnet)
library(ggplot2)
Load in rasters and movement model
anal_params = yaml.load_file("analysis_parameters.yml")
dat = fread("~/Repos/rsf_swine/results/glmdata_by_study/tejon.csv")
Read 0.0% of 811256 rows
Read 35.7% of 811256 rows
Read 66.6% of 811256 rows
Read 98.6% of 811256 rows
Read 811256 rows and 77 (of 77) columns from 0.376 GB file in 00:00:06
# rawdat = fread("~/Repos/rsf_swine/data/formatted/full_pig_data.csv")
ras = raster("~/Repos/rsf_swine/data/covariate_data/croplayer/tejon/tejon_fruit_and_nuts_2015.tif")
cras = raster("~/Repos/rsf_swine/data/covariate_data/croplayer/tejon/tejon_cereals_2016.tif")
raswater = raster("~/Repos/rsf_swine/data/covariate_data/water/tejon/tejon_water_nndistance.tif")
rasel = raster("~/Repos/rsf_swine/data/covariate_data/elevation/tejon/tejon_elevation.tif")
rasndvi = raster("~/Repos/rsf_swine/data/covariate_data/ndvi/tejon/tejon_ndvi_4_2016.tif")
rasmast = raster("~/Repos/rsf_swine/data/covariate_data/masting/tejon/tejon_masting.tif")
rascover = raster("~/Repos/rsf_swine/data/covariate_data/landcover//tejon/tejon_canopycover.tif")
shpcrop = shapefile("~/Repos/rsf_swine/data/covariate_data/croplayer/tejon/tejon_fruit_and_nuts_2015.shp")
cshp = shapefile("~/Repos/rsf_swine/data/covariate_data/water/tejon/raw/tejon_water_nwi_raw.shp")
cshp_trans = spTransform(cshp, CRS("+proj=longlat +datum=WGS84"))
# Only consider permanent and semi-permanent water sources: "H" and "F" in NWI lingo: https://www.fws.gov/wetlands/Data/Wetland-Codes.html
peren = cshp_trans[grepl("H", cshp_trans$ATTRIBUTE) | grepl("F", cshp_trans$ATTRIBUTE), ]
unique(dat$pigID)
[1] "tejonF02_F12" "tejonF16" "tejonF17" "tejonF05" "tejonF07" "tejonF04" "tejonF06"
[8] "tejonM301" "tejonM03" "tejonM306" "tejonM302" "tejonM307" "tejonM401"
Visualize the pig movement
tdat = dat[pigID == "tejonM302"]
tdat[ , datetime:=as.POSIXct(t, origin = '1970-01-01', tz = 'GMT')]
plot(crop(rascover, ras))
#plot(ras, col=c("white", "blue"), add=T)
plot(shpcrop, add=T)
plot(peren, add=T, border='blue')
points(tdat[, x.adj], tdat[, y.adj], cex=0.01, col="red", pch=19)
points(tdat[crop_loc == 1, x.adj], tdat[crop_loc == 1, y.adj], cex=0.1, col="gray", pch=19)
#points(tdat[, x.adj], tdat[, y.adj], cex=0.01, col="red", pch=19)
#points(tdat[crop_loc == 1, x.adj], tdat[crop_loc == 1, y.adj], cex=0.05, col="blue", pch=19)
#plot(sp, add=T)
First just look at the effects of movement independent of hour
require(doMC)
source("pigfxns.R")
tdat = dat[pigID == "tejonM302"]
tdat[ , datetime:=as.POSIXct(t, origin = '1970-01-01', tz = 'GMT')]
Xfull = build_design_matrix2(tdat, c("ndvi_loc", "ndvi_grad", "masting_loc",
"masting_grad", "water_grad",
"elevation_loc", "elevation_grad",
"fruit_and_nutsdists_grad",
"canopycover_loc", "canopycover_grad", "crw"),
c("fruit_and_nuts_loc"))
registerDoMC(cores=4)
tfit = cv.glmnet(Xfull, y=tdat$z, offset=log(tdat$tau),
family="poisson",
alpha=1, nlambda=20, nfolds=4, parallel=TRUE)
tfit$glmnet.fit$beta[, which(tfit$lambda == tfit$lambda.min), drop=F]
12 x 1 sparse Matrix of class "dgCMatrix"
s9
fruit_and_nuts_loc -1.13508408
ndvi_loc -0.03737648
ndvi_grad .
masting_loc 0.32589925
masting_grad .
water_grad -0.02177166
elevation_loc -0.58012665
elevation_grad -0.04664426
fruit_and_nutsdists_grad -0.01619372
canopycover_loc -0.49927946
canopycover_grad .
crw 0.96506653
Substantially slower movement once the pig is in a crop field. An overall tendency to move toward crop fields (NOTE: this is highly dependent on the time of day!). A tendency to stay longer in higher cover
Fit a stan model to this data
library(rstan)
poisglm = stan_model("stan_files/poisson_glm.stan")
Xfullstan = cbind(1, Xfull)
sdata = list(X=Xfullstan, N=nrow(Xfullstan), p=ncol(Xfullstan), z=tdat$z, tau=tdat$tau)
fitpglm = sampling(poisglm, data=sdata, chains=3, iter=2000)
Let’s add in daily variation in movement behavior.
tdat[ , datetime:=as.POSIXct(t, origin = '1970-01-01', tz = 'GMT')]
tdat[ , hourofday:=scale(hour(datetime))]
df_hour = 5
stdcols = c("ndvi_loc", "ndvi_grad", "masting_loc",
"masting_grad", "water_grad",
"elevation_loc", "elevation_grad",
"fruit_and_nutsdists_grad",
"canopycover_loc", "canopycover_grad", "crw")
nonstdcols = c("fruit_and_nuts_loc")
splinecols = c("ndvi_grad", "masting_grad", "elevation_grad", "fruit_and_nutsdists_grad",
"canopycover_grad", "water_grad", "crw")
Xgam = build_daily_spline_design_matrix(tdat, stdcols, nonstdcols, splinecols, df_hour=df_hour)
registerDoMC(cores=4)
fitgam = cv.glmnet(Xgam, y=tdat$z, offset=log(tdat$tau), family="poisson",
alpha=1, nlambda=20, nfolds=5, parallel=TRUE)
bestbetas = fitgam$glmnet.fit$beta[, which(fitgam$lambda == fitgam$lambda.min), drop=T]
bestbetas
fruit_and_nuts_loc ndvi_loc masting_loc elevation_loc canopycover_loc
-1.280810173 0.015654860 0.248419324 -0.485084162 -0.388919143
base_hour_1 base_hour_2 base_hour_3 base_hour_4 ndvi_grad_1
0.059019209 0.000000000 -1.875606721 -0.310093840 0.000000000
ndvi_grad_2 ndvi_grad_3 ndvi_grad_4 masting_grad_1 masting_grad_2
0.051172445 0.156166594 0.095426297 -0.006868112 -0.038413037
masting_grad_3 masting_grad_4 elevation_grad_1 elevation_grad_2 elevation_grad_3
0.097975291 -0.001155836 -0.116678032 0.155253611 -0.056037462
elevation_grad_4 fruit_and_nutsdists_grad_1 fruit_and_nutsdists_grad_2 fruit_and_nutsdists_grad_3 fruit_and_nutsdists_grad_4
-0.068425762 -0.253919885 0.659598405 0.256502244 -0.517798945
canopycover_grad_1 canopycover_grad_2 canopycover_grad_3 canopycover_grad_4 water_grad_1
-0.023065962 0.057412865 -0.026700225 -0.018683890 0.042630933
water_grad_2 water_grad_3 water_grad_4 crw_1 crw_2
-0.214278962 -0.006450523 0.126489338 0.938400820 0.868051537
crw_3 crw_4
-0.106011018 0.746115211
Plot the daily effects
library(ggplot2)
splinehour = s(hourofday, bs="cc", k=df_hour)
pred_hours = Predict.matrix(smooth.construct2(splinehour, tdat, NULL), data.frame(hourofday=sort(unique(tdat$hourofday)))) # B-spline basis for hour effect
dailyres = list()
plist = c("base_hour_*", "ndvi_grad_*", "masting_grad_*", "elevation_grad_*", "fruit_and_nutsdists_grad_*",
"canopycover_grad_*", "water_grad_*", "crw_*")
#plist = c("canopycover_loc_hour_*")
for(pattern in plist) {
eff = pred_hours %*% t(t(bestbetas[names(bestbetas) %like% pattern]))
effdt = data.frame(hour=0:23, beta=eff, coef=pattern)
dailyres[[pattern]] = effdt
}
dailyres = do.call(rbind, dailyres)
tplot = ggplot(dailyres) + geom_line(aes(x=hour, y=beta)) + xlab("Hour of day") + ylab("Effect of on pig movement") + theme_bw() + facet_wrap(~coef, scales="free")
print(tplot)
#ggsave("../docs/presentations/images/canopycoverloc.pdf", width=5, height =3)
This is exactly what we would expect based on a empirical exploration of the movement patterns. A strong tendency to move toward the used fruit and nut fields during the evening hours and thi tendency reverses in the in the early morning hours where the pig is moving away from these fields. Because the fields are in low NDVI areas, there is a strong correlation
Fit model to all pigs.
unique(dat$pigID)
allres = list()
nongam = list()
allbestbetas = list()
for(pig in unique(dat$pigID)){
# Extract a format data
tdat = dat[pigID == pig]
cat("Fitting pig", pig, "\n")
tdat[ , datetime:=as.POSIXct(t, origin = '1970-01-01', tz = 'GMT')]
tdat[ , hourofday:=scale(hour(datetime))]
# Set fngrad to 0 if it is NA
if(all(is.na(tdat$fruit_and_nutsdists_grad)))
fngrad = rep(0, nrow(tdat))
else
fngrad = tdat$fruit_and_nutsdists_grad
tdat$fruit_and_nutsdists_grad = fngrad
# Step 1: Fit the non-daily model
Xfull = build_design_matrix2(tdat, c("ndvi_loc", "ndvi_grad", "masting_loc",
"masting_grad", "water_grad",
"elevation_loc", "elevation_grad",
"fruit_and_nutsdists_grad",
"canopycover_loc", "canopycover_grad", "crw"),
c("fruit_and_nuts_loc"))
registerDoMC(cores=4)
tfit = cv.glmnet(Xfull, y=tdat$z, offset=log(tdat$tau),
family="poisson",
alpha=1, nlambda=20, nfolds=4, parallel=TRUE)
nongam[[pig]] = tfit$glmnet.fit$beta[, which(tfit$lambda == tfit$lambda.min), drop=T]
# Step 2: Fit the daily model
stdcols = c("ndvi_loc", "ndvi_grad", "masting_loc",
"masting_grad", "water_grad",
"elevation_loc", "elevation_grad",
"fruit_and_nutsdists_grad",
"canopycover_loc", "canopycover_grad", "crw")
nonstdcols = c("fruit_and_nuts_loc")
splinecols = c("ndvi_grad", "masting_grad", "elevation_grad", "fruit_and_nutsdists_grad",
"canopycover_grad", "water_grad", "crw")
if(pig == "tejonM302")
Xtemp = Xgam
Xgam = build_daily_spline_design_matrix(tdat, stdcols, nonstdcols, splinecols, df_hour=5)
registerDoMC(cores=4)
fitgam = cv.glmnet(Xgam, y=tdat$z, offset=log(tdat$tau), family="poisson",
alpha=1, nlambda=20, nfolds=5, parallel=TRUE)
bestbetas = fitgam$glmnet.fit$beta[, which(fitgam$lambda == fitgam$lambda.1se), drop=T]
allbestbetas[[pig]] = bestbetas
# Step 3: Extract cyclic basis for hour effect
splinehour = s(hourofday, bs="cc", k=df_hour)
pred_hours = Predict.matrix(smooth.construct2(splinehour, tdat, NULL), data.frame(hourofday=sort(unique(tdat$hourofday))))
dailyres = list()
plist = c("base_hour_*", "ndvi_grad_*", "masting_grad_*", "elevation_grad_*", "fruit_and_nutsdists_grad_*",
"canopycover_grad_*", "water_grad_*", "crw_*")
for(pattern in plist) {
eff = pred_hours %*% t(t(bestbetas[names(bestbetas) %like% pattern]))
effdt = data.frame(hour=0:23, beta=eff, coef=pattern)
dailyres[[pattern]] = effdt
}
dailyres = do.call(rbind, dailyres)
dailyres$pigID = pig
cat("Saving", pig, "\n")
allres[[pig]] = dailyres
}
Fitting pig tejonF02_F12
Saving tejonF02_F12
Fitting pig tejonF16
Saving tejonF16
Fitting pig tejonF17
Saving tejonF17
Fitting pig tejonF05
Saving tejonF05
Fitting pig tejonF07
Saving tejonF07
Fitting pig tejonF04
Saving tejonF04
Fitting pig tejonF06
Saving tejonF06
Fitting pig tejonM301
Saving tejonM301
Fitting pig tejonM03
Saving tejonM03
Fitting pig tejonM306
Saving tejonM306
Fitting pig tejonM302
Saving tejonM302
Fitting pig tejonM307
Saving tejonM307
Fitting pig tejonM401
Saving tejonM401
allres_dt = as.data.table(do.call(rbind, allres))
ggplot(allres_dt) + geom_line(aes(x=hour, y=beta, color=pigID)) + xlab("Hour of day") + ylab("Effect on hourly movement rate") + theme_bw() + facet_wrap(~coef, scales="free")
locparams = sapply(allbestbetas, function(x) x[names(x) %like% "*_loc"])
mlocparams = melt(locparams)
colnames(mlocparams) = c("coef", "pig", "value")
ggplot(mlocparams) + geom_boxplot(aes(x=coef, y=value)) + theme_bw()
A consistent tendecy to remain longer in higher NDVI cells. No consistent effect of elevation or masting. Considering how seasonal masting is, this is not altogether surprising. Canopy cover is, a bit more surprisingly much more variable.
nongamparams = do.call(rbind, nongam)
mparams = melt(nongamparams)
colnames(mparams) = c("pig", "coef", "value")
ggplot(mparams) + geom_boxplot(aes(x=coef, y=value)) + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
There are two pigs that seem to be doing some weird things: tejonF07 and tejonF05. Let’s look at these a bit more closely.
require(lubridate)
tF07 = dat[pigID == "tejonF05"]
tF07[, datetime:=as.POSIXct(t, origin = '1970-01-01', tz = 'GMT')]
max(tF07$datetime) - min(tF07$datetime)
plot(tF07$datetime, rep(1, nrow(tF07)))
hist(hour(tF07$datetime))
For these pigs, it does seem that they are moving much more during the day than other pigs in the population. Why is that?
library(ggfortify)
library(ggplot2)
allparams = do.call(rbind, allbestbetas)
# Drop columns that are all 0
not0 = !apply(allparams, 2, function(x) all(x == 0))
redparams = allparams[, not0]
redparams_dt = as.data.frame(redparams)
redparams_dt$pigID = rownames(redparams)
pcares = stats::prcomp(redparams, scale=T)
autoplot(pcares, data = redparams_dt, colour = 'pigID',
loadings = T, loadings.colour = 'blue',
loadings.label = T, loadings.label.size = 2, label=TRUE, label.size=3)
#(pcares$sdev)^2 / sum((pcares$sdev)^2)
#barplot(pcares$rotation[, 2][order(abs(pcares$rotation[, 2]))])
locparams = redparams[ , colnames(redparams) %like% "*_loc"]
for(p in plist){
sump = t(t(rowSums(redparams[, colnames(redparams) %like% p])))
locparams = cbind(sump, locparams)
colnames(locparams)[1] = p
}
locparams = locparams[, !(colnames(locparams) %in% c("crw_*", "base_hour_*"))]
locparams_dt = as.data.frame(locparams)
locparams_dt$pigID = rownames(locparams)
ploc = stats::prcomp(locparams, scale=T)
autoplot(ploc, data = locparams_dt, colour = 'pigID',
loadings = T, loadings.colour = 'blue',
loadings.label = T, loadings.label.size = 2, label=TRUE, label.size=3)
We have considered daily effects of movement, no let’s consider seasonal (i.e. monthly effects on movement). Two approaches for doing this
Predicted seasonal covariates
Likely seasonal covariates
Xseason = build_seasonal_design_matrix(tdat, stdcols, nonstdcols, splinecols, seasonalcols, df_hour=df_hour)
summer fall winter spring
[1,] 1 0 0 0
[2,] 1 0 0 0
[3,] 1 0 0 0
[4,] 1 0 0 0
[5,] 1 0 0 0
[6,] 1 0 0 0
Error in seasonmat[, season] : subscript out of bounds
bestbetas = fitseason$glmnet.fit$beta[, which(fitseason$lambda == fitseason$lambda.min), drop=T]
bestbetas
water_grad_1:spring water_grad_2:spring water_grad_3:spring water_grad_4:spring
0.000000000 0.000000000 0.000000000 0.000000000
water_grad_1:winter water_grad_2:winter water_grad_3:winter water_grad_4:winter
0.000000000 0.000000000 0.000000000 0.000000000
water_grad_1:fall water_grad_2:fall water_grad_3:fall water_grad_4:fall
0.040594515 -0.208389961 0.075296108 0.090820608
water_grad_1:summer water_grad_2:summer water_grad_3:summer water_grad_4:summer
0.000000000 -0.224349345 -0.467104193 0.244472113
ndvi_loc:spring ndvi_loc:winter ndvi_loc:fall ndvi_loc:summer
0.000000000 0.000000000 -0.040763710 0.256240445
ndvi_grad_1:spring ndvi_grad_2:spring ndvi_grad_3:spring ndvi_grad_4:spring
0.000000000 0.000000000 0.000000000 0.000000000
ndvi_grad_1:winter ndvi_grad_2:winter ndvi_grad_3:winter ndvi_grad_4:winter
0.000000000 0.000000000 0.000000000 0.000000000
ndvi_grad_1:fall ndvi_grad_2:fall ndvi_grad_3:fall ndvi_grad_4:fall
0.015108503 0.039760177 0.200690524 0.122868885
ndvi_grad_1:summer ndvi_grad_2:summer ndvi_grad_3:summer ndvi_grad_4:summer
-0.003235823 0.147947903 0.148405816 -0.006732353
masting_loc:spring masting_loc:winter masting_loc:fall masting_loc:summer
0.000000000 0.000000000 0.332703924 0.060485702
masting_grad_1:spring masting_grad_2:spring masting_grad_3:spring masting_grad_4:spring
0.000000000 0.000000000 0.000000000 0.000000000
masting_grad_1:winter masting_grad_2:winter masting_grad_3:winter masting_grad_4:winter
0.000000000 0.000000000 0.000000000 0.000000000
masting_grad_1:fall masting_grad_2:fall masting_grad_3:fall masting_grad_4:fall
-0.040016285 -0.016222331 0.055214070 -0.035491126
masting_grad_1:summer masting_grad_2:summer masting_grad_3:summer masting_grad_4:summer
0.013566022 -0.076122075 0.122760423 0.117177137
fruit_and_nutsdists_grad_1:spring fruit_and_nutsdists_grad_2:spring fruit_and_nutsdists_grad_3:spring fruit_and_nutsdists_grad_4:spring
0.000000000 0.000000000 0.000000000 0.000000000
fruit_and_nutsdists_grad_1:winter fruit_and_nutsdists_grad_2:winter fruit_and_nutsdists_grad_3:winter fruit_and_nutsdists_grad_4:winter
0.000000000 0.000000000 0.000000000 0.000000000
fruit_and_nutsdists_grad_1:fall fruit_and_nutsdists_grad_2:fall fruit_and_nutsdists_grad_3:fall fruit_and_nutsdists_grad_4:fall
-0.287264857 0.681829141 0.241771677 -0.469621767
fruit_and_nutsdists_grad_1:summer fruit_and_nutsdists_grad_2:summer fruit_and_nutsdists_grad_3:summer fruit_and_nutsdists_grad_4:summer
-0.070339777 0.547666876 0.266771894 -0.796233551
fruit_and_nuts_loc elevation_loc canopycover_loc base_hour_1
-1.272343825 -0.465029001 -0.397791335 0.082369122
base_hour_2 base_hour_3 base_hour_4 elevation_grad_1
0.000000000 -1.896707767 -0.315994956 -0.100603955
elevation_grad_2 elevation_grad_3 elevation_grad_4 canopycover_grad_1
0.146299585 -0.024508938 -0.057671454 -0.017052913
canopycover_grad_2 canopycover_grad_3 canopycover_grad_4 crw_1
0.047664304 -0.010897735 -0.011045384 0.939725265
crw_2 crw_3 crw_4 summer
0.863646471 -0.119535231 0.747834212 -0.107865468
fall winter spring
0.000000000 0.000000000 0.000000000
Plot the seasonal location effects
# Which seasons was the pig in?
rsums = apply(Xseason[, names(seasons)], 2, sum)
inseason = names(rsums)[rsums != 0]
# Location effects
locparams = bestbetas[names(bestbetas) %like% "*loc*"]
unqparams = unique(do.call(c, lapply(strsplit(names(locparams), ":"), function(x) x[[1]])))
for(prm in unqparams){
print(locparams[names(locparams) %like% prm])
}
ndvi_loc:spring ndvi_loc:winter ndvi_loc:fall ndvi_loc:summer
0.00000000 0.00000000 -0.04010893 0.13396043
masting_loc:spring masting_loc:winter masting_loc:fall masting_loc:summer
0.0000000 0.0000000 0.2182664 0.0000000
fruit_and_nuts_loc
-0.9162766
elevation_loc
-0.3546265
canopycover_loc
-0.3968152
splinehour = s(hourofday, bs="cc", k=df_hour)
pred_hours = Predict.matrix(smooth.construct2(splinehour, tdat, NULL), data.frame(hourofday=sort(unique(tdat$hourofday)))) # B-spline basis for hour effect
dailyres = list()
plist = c("base_hour_*", "elevation_grad_*","canopycover_grad_*", "crw_*")
#plist = c("canopycover_loc_hour_*")
for(pattern in plist) {
eff = pred_hours %*% t(t(bestbetas[names(bestbetas) %like% pattern]))
effdt = data.frame(hour=0:23, beta=eff, coef=pattern)
dailyres[[pattern]] = effdt
}
dailyres = do.call(rbind, dailyres)
tplot = ggplot(dailyres) + geom_line(aes(x=hour, y=beta)) + xlab("Hour of day") + ylab("Effect of on pig movement") + theme_bw() + facet_wrap(~coef, scales="free")
print(tplot)
splinehour = s(hourofday, bs="cc", k=df_hour)
pred_hours = Predict.matrix(smooth.construct2(splinehour, tdat, NULL), data.frame(hourofday=sort(unique(tdat$hourofday)))) # B-spline basis for hour effect
dailyres = list()
rsums = apply(Xseason[, names(seasons)], 2, sum)
inseason = names(rsums)[rsums != 0]
plist = c("ndvi_grad_*", "masting_grad_*", "fruit_and_nutsdists_grad_*", "water_grad_*")
#plist = c("canopycover_loc_hour_*")
for(pattern in plist) {
dailyres[[pattern]] = list()
seasbetas = bestbetas[names(bestbetas) %like% pattern]
for(season in inseason){
eff = pred_hours %*% t(t(seasbetas[names(seasbetas) %like% season]))
effdt = data.frame(hour=0:23, beta=eff, coef=pattern)
effdt$season = season
dailyres[[pattern]][[season]] = effdt
}
}
seasonres = do.call(rbind, lapply(dailyres, function(x) do.call(rbind, x)))
tplot = ggplot(seasonres) + geom_line(aes(x=hour, y=beta, linetype=season)) + xlab("Hour of day") + ylab("Effect of on pig movement") + theme_bw() + facet_wrap(~coef, scales="free")
print(tplot)
source("pigfxns.R")
allseasbetas = list()
splinelist = list()
for(pig in unique(dat$pigID)){
cat("Fitting", pig, "\n")
tdat = dat[pigID == pig]
tdat[ , datetime:=as.POSIXct(t, origin = '1970-01-01', tz = 'GMT')]
tdat[ , hourofday:=scale(hour(datetime))]
tdat[ , monthofyear:=month(datetime)]
# Set fngrad to 0 if it is NA
if(all(is.na(tdat$fruit_and_nutsdists_grad)))
fngrad = rep(0, nrow(tdat))
else
fngrad = tdat$fruit_and_nutsdists_grad
tdat$fruit_and_nutsdists_grad = fngrad
seasons = list("summer" = c(6, 7, 8), "fall" = c(9, 10, 11),
"winter"= c(12, 1, 2), "spring"=c(3, 4, 5))
df_hour = 5
stdcols = c("ndvi_loc", "ndvi_grad", "masting_loc",
"masting_grad", "water_grad",
"elevation_loc", "elevation_grad",
"fruit_and_nutsdists_grad",
"canopycover_loc", "canopycover_grad", "crw")
nonstdcols = c("fruit_and_nuts_loc")
splinecols = c("ndvi_grad", "masting_grad", "elevation_grad", "fruit_and_nutsdists_grad",
"canopycover_grad", "water_grad", "crw")
seasonalcols = c("fruit_and_nutsdists_grad", "masting_grad", "masting_loc", "ndvi_grad", "ndvi_loc", "water_grad",
"crw")
Xseason = build_seasonal_design_matrix(tdat, stdcols, nonstdcols, splinecols, seasonalcols, df_hour=df_hour)
registerDoMC(cores=4)
fitseason = cv.glmnet(Xseason, y=tdat$z, offset=log(tdat$tau), family="poisson",
alpha=1, nlambda=20, nfolds=5, parallel=TRUE)
bestbetas = fitseason$glmnet.fit$beta[, which(fitseason$lambda == fitseason$lambda.1se), drop=T]
allseasbetas[[pig]] = bestbetas
splinehour = s(hourofday, bs="cc", k=df_hour)
pred_hours = Predict.matrix(smooth.construct2(splinehour, tdat, NULL), data.frame(hourofday=sort(unique(tdat$hourofday)))) # B-spline basis for hour effect
dailyres = list()
rsums = apply(Xseason[, names(seasons)], 2, sum)
inseason = names(rsums)[rsums != 0]
plist = c("ndvi_grad_*", "masting_grad_*", "fruit_and_nutsdists_grad_*", "water_grad_*", "crw_*", "base_hour_*")
#plist = c("canopycover_loc_hour_*")
for(pattern in plist) {
dailyres[[pattern]] = list()
seasbetas = bestbetas[names(bestbetas) %like% pattern]
for(season in inseason){
eff = pred_hours %*% t(t(seasbetas[names(seasbetas) %like% season]))
effdt = data.frame(hour=0:23, beta=eff, coef=pattern)
effdt$season = season
dailyres[[pattern]][[season]] = effdt
}
}
seasonres = do.call(rbind, lapply(dailyres, function(x) do.call(rbind, x)))
seasonres$pigID = pig
splinelist[[pig]] = seasonres
}
Fitting tejonF02_F12
Fitting tejonF16
Fitting tejonF17
Fitting tejonF05
Fitting tejonF07
Fitting tejonF04
Fitting tejonF06
Fitting tejonM301
Fitting tejonM03
Fitting tejonM306
Fitting tejonM302
Fitting tejonM307
Fitting tejonM401
allpigs_seasoncoef = as.data.table(do.call(rbind, splinelist))
allpigs_seasoncoef$season = factor(allpigs_seasoncoef$season, levels=c("winter", "spring", "summer", "fall"))
meanpigs = allpigs_seasoncoef[, list(meanbeta=mean(beta)), by=list(coef, season, hour)]
meanpigs$season = factor(meanpigs$season, levels=c("winter", "spring", "summer", "fall"))
tplot = ggplot(data=NULL) + geom_line(data=allpigs_seasoncoef, aes(x=hour, y=beta, color=pigID), alpha=0.5) + geom_line(data=meanpigs, aes(x=hour, y=meanbeta), size=1) +
xlab("Hour of day") + ylab("Effect of on pig movement") + theme_bw() + facet_wrap(~coef + season, scales="free", nrow=6, ncol=4)
print(tplot)
Plot the variable location coefficients by season
Similar to the previous analysis. We see an increased tendency to linger in areas with high masting tree density in the winter (though this might be driven by very few points). The remainder of the year there is not as much selection for areas with high masting density.
Instead of using a seasonal factor as we do in the previous analysis, let’s replace this seasonal factor with an interaction between monthly temperature and precipitation and our seasonal variables.
registerDoMC(cores=4)
Error in registerDoMC(cores = 4) : could not find function "registerDoMC"
bestbetas = fitseason$glmnet.fit$beta[, which(fitseason$lambda == fitseason$lambda.1se), drop=T]
bestbetas
base_hour_1:temperature_loc:precipitation_loc base_hour_2:temperature_loc:precipitation_loc
0.0509413791 -0.0064986636
base_hour_3:temperature_loc:precipitation_loc base_hour_4:temperature_loc:precipitation_loc
0.5992985349 -0.0698370964
base_hour_1:precipitation_loc base_hour_2:precipitation_loc
0.2879690962 -0.4268504980
base_hour_3:precipitation_loc base_hour_4:precipitation_loc
-0.3028297767 0.0000000000
base_hour_1:temperature_loc base_hour_2:temperature_loc
0.3407091755 -0.7064455969
base_hour_3:temperature_loc base_hour_4:temperature_loc
-0.3899467426 0.0000000000
crw_1:temperature_loc:precipitation_loc crw_2:temperature_loc:precipitation_loc
0.0000000000 -0.0932738347
crw_3:temperature_loc:precipitation_loc crw_4:temperature_loc:precipitation_loc
0.1068090501 -0.1154302852
crw_1:precipitation_loc crw_2:precipitation_loc
0.2077015754 0.0000000000
crw_3:precipitation_loc crw_4:precipitation_loc
0.2493275989 0.0000000000
crw_1:temperature_loc crw_2:temperature_loc
0.1008813596 -0.1388014272
crw_3:temperature_loc crw_4:temperature_loc
0.0000000000 -0.0475675094
water_grad_1:temperature_loc:precipitation_loc water_grad_2:temperature_loc:precipitation_loc
0.0000000000 -0.0571194608
water_grad_3:temperature_loc:precipitation_loc water_grad_4:temperature_loc:precipitation_loc
0.0798016211 0.0000000000
water_grad_1:precipitation_loc water_grad_2:precipitation_loc
-0.0024288806 0.0950040054
water_grad_3:precipitation_loc water_grad_4:precipitation_loc
0.0000000000 -0.1186417172
water_grad_1:temperature_loc water_grad_2:temperature_loc
-0.0081758525 0.0000000000
water_grad_3:temperature_loc water_grad_4:temperature_loc
-0.1770103227 0.0000000000
ndvi_loc:temperature_loc:precipitation_loc ndvi_loc:precipitation_loc
-0.1075859961 0.0657222466
ndvi_loc:temperature_loc ndvi_grad_1:temperature_loc:precipitation_loc
-0.0120984258 0.0000000000
ndvi_grad_2:temperature_loc:precipitation_loc ndvi_grad_3:temperature_loc:precipitation_loc
0.0000000000 -0.2321655943
ndvi_grad_4:temperature_loc:precipitation_loc ndvi_grad_1:precipitation_loc
0.0008217166 -0.0027839473
ndvi_grad_2:precipitation_loc ndvi_grad_3:precipitation_loc
-0.1179538809 0.0000000000
ndvi_grad_4:precipitation_loc ndvi_grad_1:temperature_loc
0.0068035766 0.0000000000
ndvi_grad_2:temperature_loc ndvi_grad_3:temperature_loc
-0.2217073313 0.0000000000
ndvi_grad_4:temperature_loc masting_loc:temperature_loc:precipitation_loc
0.0000000000 0.2502237969
masting_loc:precipitation_loc masting_loc:temperature_loc
0.0000000000 -0.0407435744
masting_grad_1:temperature_loc:precipitation_loc masting_grad_2:temperature_loc:precipitation_loc
0.0210441721 0.0000000000
masting_grad_3:temperature_loc:precipitation_loc masting_grad_4:temperature_loc:precipitation_loc
0.0000000000 0.1484524078
masting_grad_1:precipitation_loc masting_grad_2:precipitation_loc
-0.0502199947 0.0341172459
masting_grad_3:precipitation_loc masting_grad_4:precipitation_loc
-0.0714737164 0.0000000000
masting_grad_1:temperature_loc masting_grad_2:temperature_loc
0.0306776738 0.0000000000
masting_grad_3:temperature_loc masting_grad_4:temperature_loc
-0.0964010141 0.0348303498
fruit_and_nutsdists_grad_1:temperature_loc:precipitation_loc fruit_and_nutsdists_grad_2:temperature_loc:precipitation_loc
0.0000000000 0.0000000000
fruit_and_nutsdists_grad_3:temperature_loc:precipitation_loc fruit_and_nutsdists_grad_4:temperature_loc:precipitation_loc
-0.3401670539 0.1872092138
fruit_and_nutsdists_grad_1:precipitation_loc fruit_and_nutsdists_grad_2:precipitation_loc
0.1235313483 -0.0810581190
fruit_and_nutsdists_grad_3:precipitation_loc fruit_and_nutsdists_grad_4:precipitation_loc
0.0561933996 -0.0874596649
fruit_and_nutsdists_grad_1:temperature_loc fruit_and_nutsdists_grad_2:temperature_loc
0.1425060741 0.0000000000
fruit_and_nutsdists_grad_3:temperature_loc fruit_and_nutsdists_grad_4:temperature_loc
-0.3269487121 0.0000000000
fruit_and_nuts_loc ndvi_loc
-1.0611488112 -0.2572557622
masting_loc elevation_loc
0.5430828136 -0.4178863853
canopycover_loc base_hour_1
-0.3318493700 0.2264697186
base_hour_2 base_hour_3
0.0000000000 -1.7057952482
base_hour_4 ndvi_grad_1
-0.3282004231 0.0000000000
ndvi_grad_2 ndvi_grad_3
0.0000000000 0.0000000000
ndvi_grad_4 masting_grad_1
0.0148908877 -0.0037137117
masting_grad_2 masting_grad_3
-0.0356481982 0.0000000000
masting_grad_4 elevation_grad_1
0.0996230927 -0.0756440683
elevation_grad_2 elevation_grad_3
0.0975331862 0.0000000000
elevation_grad_4 fruit_and_nutsdists_grad_1
-0.1028042822 -0.2602659910
fruit_and_nutsdists_grad_2 fruit_and_nutsdists_grad_3
0.6584975732 -0.0220173582
fruit_and_nutsdists_grad_4 canopycover_grad_1
-0.3620918625 -0.0023753740
canopycover_grad_2 canopycover_grad_3
0.0273623182 -0.0021535087
canopycover_grad_4 water_grad_1
-0.0274083168 0.0317373349
water_grad_2 water_grad_3
-0.2549622896 0.0000000000
water_grad_4 crw_1
0.0931048160 0.9291095523
crw_2 crw_3
0.7597740210 -0.0929531749
crw_4 temperature_loc
0.6372800436 0.0000000000
precipitation_loc temperature_loc:precipitation_loc
0.0000000000 0.0000000000
# Build a monthly temperature and precipitation predictor
monthtemp = dat[, list(stemp=scale2(temperature_loc), monthofyear)][, list(temperature_loc=mean(stemp)), by=monthofyear]
monthprecip = dat[, list(sprecip=scale2(precipitation_loc), monthofyear)][, list(precipitation_loc=mean(sprecip)), by=monthofyear]
monthtp = cbind(monthtemp, monthprecip[, list(precipitation_loc)])
monthtp[, ("temperature_loc:precipitation_loc"):=temperature_loc*precipitation_loc]
splinehour = s(hourofday, bs="cc", k=df_hour)
pred_hours = Predict.matrix(smooth.construct2(splinehour, tdat, NULL), data.frame(hourofday=sort(unique(tdat$hourofday)))) # B-spline basis for hour effect
dailyres = list()
inmonth = unique(tdat$monthofyear)
plist = c("ndvi_grad_*", "masting_grad_*", "fruit_and_nutsdists_grad_*", "water_grad_*", "crw_*", "base_hour_*")
tpcols = c("temperature_loc", "precipitation_loc", "temperature_loc:precipitation_loc")
#plist = c("canopycover_loc_hour_*")
for(pattern in plist) { # Loop through each pattern
dailyres[[pattern]] = list()
tpbetas = bestbetas[names(bestbetas) %like% pattern]
for(month in inmonth){
basenames = paste0(strsplit(pattern, "*", fixed=T)[[1]], 1:(df_hour - 1))
baseeff = pred_hours %*% t(t(tpbetas[basenames]))
for(tp in tpcols){
tnames = paste0(strsplit(pattern, "*", fixed=T)[[1]], 1:(df_hour - 1), ":", tp)
tpval = as.numeric(monthtp[monthofyear == month, tp, with=F])
teff = (pred_hours*tpval) %*% t(t(tpbetas[tnames]))
baseeff = baseeff + teff
}
effdt = data.frame(hour=0:23, beta=baseeff, coef=pattern)
effdt$monthofyear = month
dailyres[[pattern]][[month]] = effdt
}
}
monthres = do.call(rbind, lapply(dailyres, function(x) do.call(rbind, x)))
tplot = ggplot(monthres) + geom_line(aes(x=hour, y=beta, color=as.factor(monthofyear))) + xlab("Hour of day") + ylab("Effect on pig movement") + theme_bw() + facet_wrap(~coef, scales="free")
tpplot = ggplot(monthtp) + geom_line(aes(x=monthofyear, y=temperature_loc, color="temperature")) +
geom_line(aes(x=monthofyear, y=precipitation_loc, color="precipitation")) + theme_bw() + xlab("Month of year") + ylab("Standardized mean monthy value")
require(gridExtra)
grid.arrange(tplot, tpplot, nrow=1, ncol=2)
Plot how the non-daily varying location-based variables change with temperature
basenames = sapply(strsplit(names(bestbetas), ":", fixed=T), function(x) x[[1]][1])
locparams = bestbetas[basenames %like% "*_loc*"]
seas_lps = c("ndvi_loc", "masting_loc")
loceffects = list()
for(param in seas_lps){
loceffects[[param]] = list()
for(month in inmonth){
baseeff = locparams[param]
for(tp in tpcols){
tpval = as.numeric(monthtp[monthofyear == month, tp, with=F])
baseeff = baseeff + tpval*locparams[paste0(param, ":", tp)]
}
loceffects[[param]][[month]] = data.frame(month=month, coef=param, beta=baseeff)
}
}
monthloc = do.call(rbind, lapply(loceffects, function(x) do.call(rbind, x)))
tplot = ggplot(monthloc) + geom_path(aes(x=month, y=beta)) + xlab("Month of year") + ylab("Effect on pig movement") + theme_bw() + facet_wrap(~coef)
#tplot
tpplot = ggplot(monthtp) + geom_line(aes(x=monthofyear, y=temperature_loc, color="temperature")) +
geom_line(aes(x=monthofyear, y=precipitation_loc, color="precipitation")) + theme_bw() + xlab("Month of year") + ylab("Standardized mean monthy value")
require(gridExtra)
grid.arrange(tplot, tpplot, nrow=1, ncol=2)
alltpres = list()
alltpbestbetas = list()
for(pig in unique(dat$pigID)){
cat("Working on", pig, "\n")
tdat = dat[pigID == pig]
tdat[ , datetime:=as.POSIXct(t, origin = '1970-01-01', tz = 'GMT')]
tdat[ , hourofday:=scale(hour(datetime))]
tdat[ , monthofyear:=month(datetime)]
# Set fngrad to 0 if it is NA
if(all(is.na(tdat$fruit_and_nutsdists_grad)))
fngrad = rep(0, nrow(tdat))
else
fngrad = tdat$fruit_and_nutsdists_grad
tdat$fruit_and_nutsdists_grad = fngrad
df_hour = 5
stdcols = c("ndvi_loc", "ndvi_grad", "masting_loc",
"masting_grad", "water_grad",
"elevation_loc", "elevation_grad",
"fruit_and_nutsdists_grad",
"canopycover_loc", "canopycover_grad", "crw")
nonstdcols = c("fruit_and_nuts_loc")
splinecols = c("ndvi_grad", "masting_grad", "elevation_grad", "fruit_and_nutsdists_grad",
"canopycover_grad", "water_grad", "crw")
seasonalcols = c("fruit_and_nutsdists_grad", "masting_grad", "masting_loc", "ndvi_grad", "ndvi_loc", "water_grad",
"crw")
Xseason = build_tempprecip_design_matrix(tdat, stdcols, nonstdcols, splinecols, seasonalcols, df_hour=df_hour)
registerDoMC(cores=4)
fitseason = cv.glmnet(Xseason, y=tdat$z, offset=log(tdat$tau), family="poisson",
alpha=1, nlambda=20, nfolds=5, parallel=TRUE)
bestbetas = fitseason$glmnet.fit$beta[, which(fitseason$lambda == fitseason$lambda.1se), drop=T]
# Build a monthly temperature and precipitation predictor
monthtemp = dat[, list(stemp=scale2(temperature_loc), monthofyear)][, list(temperature_loc=mean(stemp)), by=monthofyear]
monthprecip = dat[, list(sprecip=scale2(precipitation_loc), monthofyear)][, list(precipitation_loc=mean(sprecip)), by=monthofyear]
monthtp = cbind(monthtemp, monthprecip[, list(precipitation_loc)])
monthtp[, ("temperature_loc:precipitation_loc"):=temperature_loc*precipitation_loc]
splinehour = s(hourofday, bs="cc", k=df_hour)
pred_hours = Predict.matrix(smooth.construct2(splinehour, tdat, NULL), data.frame(hourofday=sort(unique(tdat$hourofday)))) # B-spline basis for hour effect
dailyres = list()
inmonth = unique(tdat$monthofyear)
plist = c("ndvi_grad_*", "masting_grad_*", "fruit_and_nutsdists_grad_*", "water_grad_*", "crw_*", "base_hour_*")
tpcols = c("temperature_loc", "precipitation_loc", "temperature_loc:precipitation_loc")
#plist = c("canopycover_loc_hour_*")
for(pattern in plist) { # Loop through each pattern
dailyres[[pattern]] = list()
tpbetas = bestbetas[names(bestbetas) %like% pattern]
for(month in inmonth){
basenames = paste0(strsplit(pattern, "*", fixed=T)[[1]], 1:(df_hour - 1))
baseeff = pred_hours %*% t(t(tpbetas[basenames]))
for(tp in tpcols){
tnames = paste0(strsplit(pattern, "*", fixed=T)[[1]], 1:(df_hour - 1), ":", tp)
tpval = as.numeric(monthtp[monthofyear == month, tp, with=F])
teff = (pred_hours*tpval) %*% t(t(tpbetas[tnames]))
baseeff = baseeff + teff
}
effdt = data.frame(hour=0:23, beta=baseeff, coef=pattern)
effdt$monthofyear = month
dailyres[[pattern]][[month]] = effdt
}
}
monthres = do.call(rbind, lapply(dailyres, function(x) do.call(rbind, x)))
monthres$pigID = pig
alltpres[[pig]] = monthres
alltpbestbetas[[pig]] = bestbetas
}
allpigstp = as.data.table(do.call(rbind, alltpres))
meanpig = allpigstp[, list(beta=mean(beta)), by=list(monthofyear, hour, coef)]
meanpig$pigID = "meantejonpig"
meanpig = meanpig[, colnames(allpigstp), with=F]
allpigstpfull = rbind(allpigstp, meanpig)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
tplot = ggplot(allpigstpfull) + geom_line(aes(x=hour, y=beta, color=as.factor(monthofyear))) + xlab("Hour of day") + ylab("Effect on pig movement") + theme_bw() + facet_wrap(~pigID + coef, scales="free", ncol=6, nrow=14) #+ scale_colour_brewer(palette = "Spectral")
tplot
Plot the non-daily coefficients
allpigloceffects = list()
for(pig in names(alltpbestbetas)){
inmonth = unique(dat[pigID == pig]$monthofyear)
bestbetas = alltpbestbetas[[pig]]
basenames = sapply(strsplit(names(bestbetas), ":", fixed=T), function(x) x[[1]][1])
locparams = bestbetas[basenames %like% "*_loc*"]
seas_lps = c("ndvi_loc", "masting_loc")
loceffects = list()
for(param in seas_lps){
loceffects[[param]] = list()
for(month in inmonth){
baseeff = locparams[param]
for(tp in tpcols){
tpval = as.numeric(monthtp[monthofyear == month, tp, with=F])
baseeff = baseeff + tpval*locparams[paste0(param, ":", tp)]
}
loceffects[[param]][[month]] = data.frame(month=month, coef=param, beta=baseeff)
}
}
monthloc = do.call(rbind, lapply(loceffects, function(x) do.call(rbind, x)))
monthloc$pigID = pig
allpigloceffects[[pig]] = monthloc
}
loccoefs = as.data.table(do.call(rbind, allpigloceffects))
mloccoefs = loccoefs[, list(beta=median(beta)), by=list(coef, month) ]
tplot = ggplot(loccoefs) + geom_point(aes(x=month, y=beta)) + geom_smooth(aes(x=month, y=beta), stat="smooth") + xlab("Month of year") + ylab("Effect on pig movement") + theme_bw() + facet_wrap(~coef)
tplot
tpplot = ggplot(monthtp) + geom_line(aes(x=monthofyear, y=temperature_loc, color="temperature")) +
geom_line(aes(x=monthofyear, y=precipitation_loc, color="precipitation")) + theme_bw() + xlab("Month of year") + ylab("Standardized mean monthy value")
require(gridExtra)
grid.arrange(tplot, tpplot, nrow=1, ncol=2)
Wow, really hard to see much consistency.